In [2]:
!pip install pandas
!pip install plotly
!pip install dash
!pip install dash_bootstrap_components
Requirement already satisfied: pandas in c:\users\omate\anaconda3\lib\site-packages (1.1.3)
Requirement already satisfied: pytz>=2017.2 in c:\users\omate\anaconda3\lib\site-packages (from pandas) (2020.1)
Requirement already satisfied: python-dateutil>=2.7.3 in c:\users\omate\anaconda3\lib\site-packages (from pandas) (2.8.1)
Requirement already satisfied: numpy>=1.15.4 in c:\users\omate\anaconda3\lib\site-packages (from pandas) (1.19.2)
Requirement already satisfied: six>=1.5 in c:\users\omate\anaconda3\lib\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)
Requirement already satisfied: plotly in c:\users\omate\anaconda3\lib\site-packages (5.8.0)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\omate\anaconda3\lib\site-packages (from plotly) (8.0.1)
Requirement already satisfied: dash in c:\users\omate\anaconda3\lib\site-packages (2.5.0)
Requirement already satisfied: Flask>=1.0.4 in c:\users\omate\anaconda3\lib\site-packages (from dash) (1.1.2)
Requirement already satisfied: flask-compress in c:\users\omate\anaconda3\lib\site-packages (from dash) (1.12)
Requirement already satisfied: dash-html-components==2.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash) (2.0.0)
Requirement already satisfied: plotly>=5.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash) (5.8.0)
Requirement already satisfied: dash-table==5.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash) (5.0.0)
Requirement already satisfied: dash-core-components==2.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash) (2.0.0)
Requirement already satisfied: Werkzeug>=0.15 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash) (1.0.1)
Requirement already satisfied: itsdangerous>=0.24 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash) (1.1.0)
Requirement already satisfied: click>=5.1 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash) (7.1.2)
Requirement already satisfied: Jinja2>=2.10.1 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash) (2.11.2)
Requirement already satisfied: brotli in c:\users\omate\anaconda3\lib\site-packages (from flask-compress->dash) (1.0.9)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\omate\anaconda3\lib\site-packages (from plotly>=5.0.0->dash) (8.0.1)
Requirement already satisfied: MarkupSafe>=0.23 in c:\users\omate\anaconda3\lib\site-packages (from Jinja2>=2.10.1->Flask>=1.0.4->dash) (1.1.1)
Requirement already satisfied: dash_bootstrap_components in c:\users\omate\anaconda3\lib\site-packages (1.1.0)
Requirement already satisfied: dash>=2.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash_bootstrap_components) (2.5.0)
Requirement already satisfied: dash-core-components==2.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (2.0.0)
Requirement already satisfied: dash-table==5.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (5.0.0)
Requirement already satisfied: flask-compress in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (1.12)
Requirement already satisfied: Flask>=1.0.4 in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (1.1.2)
Requirement already satisfied: plotly>=5.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (5.8.0)
Requirement already satisfied: dash-html-components==2.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (2.0.0)
Requirement already satisfied: brotli in c:\users\omate\anaconda3\lib\site-packages (from flask-compress->dash>=2.0.0->dash_bootstrap_components) (1.0.9)
Requirement already satisfied: Jinja2>=2.10.1 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash>=2.0.0->dash_bootstrap_components) (2.11.2)
Requirement already satisfied: itsdangerous>=0.24 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash>=2.0.0->dash_bootstrap_components) (1.1.0)
Requirement already satisfied: Werkzeug>=0.15 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash>=2.0.0->dash_bootstrap_components) (1.0.1)
Requirement already satisfied: click>=5.1 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash>=2.0.0->dash_bootstrap_components) (7.1.2)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\omate\anaconda3\lib\site-packages (from plotly>=5.0.0->dash>=2.0.0->dash_bootstrap_components) (8.0.1)
Requirement already satisfied: MarkupSafe>=0.23 in c:\users\omate\anaconda3\lib\site-packages (from Jinja2>=2.10.1->Flask>=1.0.4->dash>=2.0.0->dash_bootstrap_components) (1.1.1)
In [3]:
import pandas as pd
df=pd.read_csv("C:\\Users\\omate\\Downloads\\india___monthly_rainfall_data___1901_to_2002 (1).csv")
df.head()
Out[3]:
State District Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec vlookup
0 Andaman & Nicobar Islands Andaman NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN Andaman & Nicobar IslandsAndaman
1 Andaman & Nicobar Islands Nicobar NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN Andaman & Nicobar IslandsNicobar
2 Andhra Pradesh Adilabad 1901.0 6.725 10.488 23.288 35.560 23.119 115.546 294.119 276.865 181.615 47.310 1.339 0.000 Andhra PradeshAdilabad
3 Andhra Pradesh Adilabad 1902.0 0.420 0.000 0.388 6.070 3.331 45.960 233.973 167.971 198.177 26.447 35.083 11.222 Andhra PradeshAdilabad
4 Andhra Pradesh Adilabad 1903.0 6.643 1.956 0.173 4.551 33.348 132.078 436.611 334.544 226.037 138.818 14.095 8.823 Andhra PradeshAdilabad
In [4]:
df=df.iloc[2:]
df
Out[4]:
State District Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec vlookup
2 Andhra Pradesh Adilabad 1901.0 6.725 10.488 23.288 35.560 23.119 115.546 294.119 276.865 181.615 47.310 1.339 0.000 Andhra PradeshAdilabad
3 Andhra Pradesh Adilabad 1902.0 0.420 0.000 0.388 6.070 3.331 45.960 233.973 167.971 198.177 26.447 35.083 11.222 Andhra PradeshAdilabad
4 Andhra Pradesh Adilabad 1903.0 6.643 1.956 0.173 4.551 33.348 132.078 436.611 334.544 226.037 138.818 14.095 8.823 Andhra PradeshAdilabad
5 Andhra Pradesh Adilabad 1904.0 0.054 0.121 11.446 0.017 16.900 131.048 160.694 81.865 251.577 110.391 0.146 0.130 Andhra PradeshAdilabad
6 Andhra Pradesh Adilabad 1905.0 0.589 2.293 8.252 35.020 17.569 79.937 96.331 313.522 361.697 4.950 0.146 0.000 Andhra PradeshAdilabad
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
55314 West Bengal North Dinajpur 2000.0 11.294 10.908 10.686 27.669 110.618 264.845 185.798 297.535 280.883 22.048 0.568 0.423 West BengalNorth Dinajpur
55315 West Bengal North Dinajpur 2001.0 1.866 4.048 21.805 36.436 152.242 164.361 311.196 271.373 165.015 124.258 2.798 0.000 West BengalNorth Dinajpur
55316 West Bengal North Dinajpur 2002.0 14.939 3.758 12.410 54.591 80.993 189.604 276.109 285.924 215.591 108.733 17.757 0.000 West BengalNorth Dinajpur
55317 Lakshadweep Lakshadweep NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN LakshadweepLakshadweep
55318 Goa Goa NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN GoaGoa

55317 rows × 16 columns

In [5]:
print("Null values before processing:")
df.isnull().sum()
Null values before processing:
Out[5]:
State         0
District      0
Year         33
Jan          33
Feb          33
Mar          33
Apr          33
May          33
Jun          33
Jul          33
Aug          33
Sep          33
Oct          33
Nov          33
Dec          33
vlookup     102
dtype: int64
In [6]:
df[df[['Year']].isna ().any (axis=1)]
Out[6]:
State District Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec vlookup
11222 Daman & Diu Daman NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN Daman & DiuDaman
11223 Daman & Diu Diu NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN Daman & DiuDiu
12142 Gujarat Jamnagar NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN GujaratJamnagar
12143 Gujarat Junagadh NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN GujaratJunagadh
12552 Gujarat Navsari NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN GujaratNavsari
12757 Gujarat Porbandar NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN GujaratPorbandar
13370 Gujarat Valsad NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN GujaratValsad
20715 Karnataka Dakshina Kannada NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KarnatakaDakshina Kannada
22144 Karnataka Udupi NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KarnatakaUdupi
22145 Karnataka Uttara Kannada NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KarnatakaUttara Kannada
22146 Kerala Alappuzha NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaAlappuzha
22147 Kerala Ernakulam NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaErnakulam
22250 Kerala Kannur NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaKannur
22251 Kerala Kasaragod NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaKasaragod
22252 Kerala Kollam NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaKollam
22253 Kerala Kottayam NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaKottayam
22254 Kerala Kozhikode NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaKozhikode
22255 Kerala Malappuram NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaMalappuram
22256 Kerala Palakkad NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaPalakkad
22257 Kerala Pathanamthitta NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaPathanamthitta
22258 Kerala Thiruvananthapuram NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaThiruvananthapuram
22259 Kerala Thrissur NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaThrissur
22260 Kerala Wayanad NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN KeralaWayanad
28483 Maharashtra Mumbai NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN MaharashtraMumbai
28484 Maharashtra Mumbai (Suburban) NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN MaharashtraMumbai (Suburban)
29199 Maharashtra Raigarh NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN MaharashtraRaigarh
29200 Maharashtra Ratnagiri NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN MaharashtraRatnagiri
29405 Maharashtra Sindhudurg NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN MaharashtraSindhudurg
29508 Maharashtra Thane NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN MaharashtraThane
35935 Pondicherry Mahe NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN PondicherryMahe
42464 Tamil Nadu Kanniyakumari NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN Tamil NaduKanniyakumari
55317 Lakshadweep Lakshadweep NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN LakshadweepLakshadweep
55318 Goa Goa NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN GoaGoa
In [7]:
len(df[df[['Year']].isna ().any (axis=1)])
Out[7]:
33
In [8]:
df=df[~df[['Year']].isna ().any (axis=1)]
df
Out[8]:
State District Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec vlookup
2 Andhra Pradesh Adilabad 1901.0 6.725 10.488 23.288 35.560 23.119 115.546 294.119 276.865 181.615 47.310 1.339 0.000 Andhra PradeshAdilabad
3 Andhra Pradesh Adilabad 1902.0 0.420 0.000 0.388 6.070 3.331 45.960 233.973 167.971 198.177 26.447 35.083 11.222 Andhra PradeshAdilabad
4 Andhra Pradesh Adilabad 1903.0 6.643 1.956 0.173 4.551 33.348 132.078 436.611 334.544 226.037 138.818 14.095 8.823 Andhra PradeshAdilabad
5 Andhra Pradesh Adilabad 1904.0 0.054 0.121 11.446 0.017 16.900 131.048 160.694 81.865 251.577 110.391 0.146 0.130 Andhra PradeshAdilabad
6 Andhra Pradesh Adilabad 1905.0 0.589 2.293 8.252 35.020 17.569 79.937 96.331 313.522 361.697 4.950 0.146 0.000 Andhra PradeshAdilabad
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
55312 West Bengal North Dinajpur 1998.0 3.676 16.042 15.435 43.497 189.031 101.695 275.914 316.537 262.286 256.652 15.486 0.000 West BengalNorth Dinajpur
55313 West Bengal North Dinajpur 1999.0 7.867 2.932 2.066 3.020 151.115 214.111 380.077 375.183 232.015 85.839 7.591 0.255 West BengalNorth Dinajpur
55314 West Bengal North Dinajpur 2000.0 11.294 10.908 10.686 27.669 110.618 264.845 185.798 297.535 280.883 22.048 0.568 0.423 West BengalNorth Dinajpur
55315 West Bengal North Dinajpur 2001.0 1.866 4.048 21.805 36.436 152.242 164.361 311.196 271.373 165.015 124.258 2.798 0.000 West BengalNorth Dinajpur
55316 West Bengal North Dinajpur 2002.0 14.939 3.758 12.410 54.591 80.993 189.604 276.109 285.924 215.591 108.733 17.757 0.000 West BengalNorth Dinajpur

55284 rows × 16 columns

In [9]:
df['vlookup'] = df['vlookup'].fillna(df['State']+df['District'])
df
<ipython-input-9-3de84ecf340c>:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['vlookup'] = df['vlookup'].fillna(df['State']+df['District'])
Out[9]:
State District Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec vlookup
2 Andhra Pradesh Adilabad 1901.0 6.725 10.488 23.288 35.560 23.119 115.546 294.119 276.865 181.615 47.310 1.339 0.000 Andhra PradeshAdilabad
3 Andhra Pradesh Adilabad 1902.0 0.420 0.000 0.388 6.070 3.331 45.960 233.973 167.971 198.177 26.447 35.083 11.222 Andhra PradeshAdilabad
4 Andhra Pradesh Adilabad 1903.0 6.643 1.956 0.173 4.551 33.348 132.078 436.611 334.544 226.037 138.818 14.095 8.823 Andhra PradeshAdilabad
5 Andhra Pradesh Adilabad 1904.0 0.054 0.121 11.446 0.017 16.900 131.048 160.694 81.865 251.577 110.391 0.146 0.130 Andhra PradeshAdilabad
6 Andhra Pradesh Adilabad 1905.0 0.589 2.293 8.252 35.020 17.569 79.937 96.331 313.522 361.697 4.950 0.146 0.000 Andhra PradeshAdilabad
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
55312 West Bengal North Dinajpur 1998.0 3.676 16.042 15.435 43.497 189.031 101.695 275.914 316.537 262.286 256.652 15.486 0.000 West BengalNorth Dinajpur
55313 West Bengal North Dinajpur 1999.0 7.867 2.932 2.066 3.020 151.115 214.111 380.077 375.183 232.015 85.839 7.591 0.255 West BengalNorth Dinajpur
55314 West Bengal North Dinajpur 2000.0 11.294 10.908 10.686 27.669 110.618 264.845 185.798 297.535 280.883 22.048 0.568 0.423 West BengalNorth Dinajpur
55315 West Bengal North Dinajpur 2001.0 1.866 4.048 21.805 36.436 152.242 164.361 311.196 271.373 165.015 124.258 2.798 0.000 West BengalNorth Dinajpur
55316 West Bengal North Dinajpur 2002.0 14.939 3.758 12.410 54.591 80.993 189.604 276.109 285.924 215.591 108.733 17.757 0.000 West BengalNorth Dinajpur

55284 rows × 16 columns

In [10]:
print("Null values after processing:")
df.isnull().sum()
Null values after processing:
Out[10]:
State       0
District    0
Year        0
Jan         0
Feb         0
Mar         0
Apr         0
May         0
Jun         0
Jul         0
Aug         0
Sep         0
Oct         0
Nov         0
Dec         0
vlookup     0
dtype: int64
In [11]:
print("Co-Variance Matrix")
df.cov()
Co-Variance Matrix
Out[11]:
Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
Year 866.932348 3.472550 -30.216839 19.364439 -10.206869 14.997325 -146.250203 -94.709961 -46.587178 -45.853489 72.554484 -22.358163 23.882629
Jan 3.472550 335.401558 102.801691 104.282866 78.556940 7.485957 -144.002417 -130.411822 124.228681 4.702196 -49.933445 14.990472 54.653387
Feb -30.216839 102.801691 494.028537 276.304620 426.575517 668.825260 840.584787 504.102901 658.703433 362.480899 230.412530 -10.941797 23.717749
Mar 19.364439 104.282866 276.304620 1187.148176 1332.556364 2362.848442 3081.834649 2075.056854 1771.536503 1181.040630 835.525463 64.004500 39.115240
Apr -10.206869 78.556940 426.575517 1332.556364 4838.730654 6570.511426 8906.494022 5786.318829 3866.553586 3250.033571 2935.008564 664.510032 115.421430
May 14.997325 7.485957 668.825260 2362.848442 6570.511426 16950.075713 18660.285660 12565.582830 9052.677761 7052.653334 5957.579351 1045.412087 140.574974
Jun -146.250203 -144.002417 840.584787 3081.834649 8906.494022 18660.285660 40819.594550 26031.017208 18606.913694 12937.462090 8566.109179 931.692439 -327.454205
Jul -94.709961 -130.411822 504.102901 2075.056854 5786.318829 12565.582830 26031.017208 41584.688033 20669.012821 13780.762233 4777.533129 -1036.934627 -838.736478
Aug -46.587178 124.228681 658.703433 1771.536503 3866.553586 9052.677761 18606.913694 20669.012821 27412.086234 10298.077031 3085.359300 -1039.512149 -686.295322
Sep -45.853489 4.702196 362.480899 1181.040630 3250.033571 7052.653334 12937.462090 13780.762233 10298.077031 16044.794407 3695.459896 -65.476695 -261.389108
Oct 72.554484 -49.933445 230.412530 835.525463 2935.008564 5957.579351 8566.109179 4777.533129 3085.359300 3695.459896 8710.970814 2173.239251 595.658085
Nov -22.358163 14.990472 -10.941797 64.004500 664.510032 1045.412087 931.692439 -1036.934627 -1039.512149 -65.476695 2173.239251 3390.425621 800.637200
Dec 23.882629 54.653387 23.717749 39.115240 115.421430 140.574974 -327.454205 -838.736478 -686.295322 -261.389108 595.658085 800.637200 954.038586
In [12]:
print("Correlation Matrix")
df.corr()
Correlation Matrix
Out[12]:
Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
Year 1.000000 0.006440 -0.046172 0.019088 -0.004983 0.003912 -0.024585 -0.015774 -0.009557 -0.012295 0.026402 -0.013041 0.026261
Jan 0.006440 1.000000 0.252547 0.165264 0.061665 0.003140 -0.038918 -0.034919 0.040970 0.002027 -0.029213 0.014057 0.096617
Feb -0.046172 0.252547 1.000000 0.360794 0.275902 0.231127 0.187185 0.111218 0.178996 0.128748 0.111070 -0.008454 0.034547
Mar 0.019088 0.165264 0.360794 1.000000 0.555991 0.526742 0.442713 0.295332 0.310546 0.270611 0.259821 0.031903 0.036755
Apr -0.004983 0.061665 0.275902 0.555991 1.000000 0.725517 0.633734 0.407915 0.335728 0.368854 0.452075 0.164062 0.053720
May 0.003912 0.003140 0.231127 0.526742 0.725517 1.000000 0.709411 0.473293 0.419972 0.427661 0.490287 0.137903 0.034957
Jun -0.024585 -0.038918 0.187185 0.442713 0.633734 0.709411 1.000000 0.631815 0.556249 0.505531 0.454272 0.079197 -0.052473
Jul -0.015774 -0.034919 0.111218 0.295332 0.407915 0.473293 0.631815 1.000000 0.612184 0.533506 0.251017 -0.087329 -0.133161
Aug -0.009557 0.040970 0.178996 0.310546 0.335728 0.419972 0.556249 0.612184 1.000000 0.491041 0.199665 -0.107828 -0.134201
Sep -0.012295 0.002027 0.128748 0.270611 0.368854 0.427661 0.505531 0.533506 0.491041 1.000000 0.312585 -0.008878 -0.066809
Oct 0.026402 -0.029213 0.111070 0.259821 0.452075 0.490287 0.454272 0.251017 0.199665 0.312585 1.000000 0.399896 0.206624
Nov -0.013041 0.014057 -0.008454 0.031903 0.164062 0.137903 0.079197 -0.087329 -0.107828 -0.008878 0.399896 1.000000 0.445170
Dec 0.026261 0.096617 0.034547 0.036755 0.053720 0.034957 -0.052473 -0.133161 -0.134201 -0.066809 0.206624 0.445170 1.000000
In [13]:
df['mean_rainfall']=df.iloc[:,3:15].mean(axis=1)
df
<ipython-input-13-f873d37c5629>:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['mean_rainfall']=df.iloc[:,3:15].mean(axis=1)
Out[13]:
State District Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec vlookup mean_rainfall
2 Andhra Pradesh Adilabad 1901.0 6.725 10.488 23.288 35.560 23.119 115.546 294.119 276.865 181.615 47.310 1.339 0.000 Andhra PradeshAdilabad 84.664500
3 Andhra Pradesh Adilabad 1902.0 0.420 0.000 0.388 6.070 3.331 45.960 233.973 167.971 198.177 26.447 35.083 11.222 Andhra PradeshAdilabad 60.753500
4 Andhra Pradesh Adilabad 1903.0 6.643 1.956 0.173 4.551 33.348 132.078 436.611 334.544 226.037 138.818 14.095 8.823 Andhra PradeshAdilabad 111.473083
5 Andhra Pradesh Adilabad 1904.0 0.054 0.121 11.446 0.017 16.900 131.048 160.694 81.865 251.577 110.391 0.146 0.130 Andhra PradeshAdilabad 63.699083
6 Andhra Pradesh Adilabad 1905.0 0.589 2.293 8.252 35.020 17.569 79.937 96.331 313.522 361.697 4.950 0.146 0.000 Andhra PradeshAdilabad 76.692167
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
55312 West Bengal North Dinajpur 1998.0 3.676 16.042 15.435 43.497 189.031 101.695 275.914 316.537 262.286 256.652 15.486 0.000 West BengalNorth Dinajpur 124.687583
55313 West Bengal North Dinajpur 1999.0 7.867 2.932 2.066 3.020 151.115 214.111 380.077 375.183 232.015 85.839 7.591 0.255 West BengalNorth Dinajpur 121.839250
55314 West Bengal North Dinajpur 2000.0 11.294 10.908 10.686 27.669 110.618 264.845 185.798 297.535 280.883 22.048 0.568 0.423 West BengalNorth Dinajpur 101.939583
55315 West Bengal North Dinajpur 2001.0 1.866 4.048 21.805 36.436 152.242 164.361 311.196 271.373 165.015 124.258 2.798 0.000 West BengalNorth Dinajpur 104.616500
55316 West Bengal North Dinajpur 2002.0 14.939 3.758 12.410 54.591 80.993 189.604 276.109 285.924 215.591 108.733 17.757 0.000 West BengalNorth Dinajpur 105.034083

55284 rows × 17 columns

In [14]:
import matplotlib.pyplot as plt

a = df.groupby('State').mean()
plt.figure(figsize=(16,6),dpi=80)
plt.xticks(rotation=90)
plt.plot(a['mean_rainfall'],label='mean_rainfall')
plt.legend(loc='best')
plt.title("Mean rainfall by State")
Out[14]:
Text(0.5, 1.0, 'Mean rainfall by State')
In [15]:
df['Dec-Feb']=df[['Dec','Jan','Feb']].sum(axis=1)
df['Mar-Jun']=df[['Mar','Apr','May','Jun']].sum(axis=1)
df['Jul-Nov']=df[['Jul','Aug','Sep','Oct','Nov']].sum(axis=1)

df
<ipython-input-15-bf2e370200f1>:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Dec-Feb']=df[['Dec','Jan','Feb']].sum(axis=1)
<ipython-input-15-bf2e370200f1>:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Mar-Jun']=df[['Mar','Apr','May','Jun']].sum(axis=1)
<ipython-input-15-bf2e370200f1>:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Jul-Nov']=df[['Jul','Aug','Sep','Oct','Nov']].sum(axis=1)
Out[15]:
State District Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec vlookup mean_rainfall Dec-Feb Mar-Jun Jul-Nov
2 Andhra Pradesh Adilabad 1901.0 6.725 10.488 23.288 35.560 23.119 115.546 294.119 276.865 181.615 47.310 1.339 0.000 Andhra PradeshAdilabad 84.664500 17.213 197.513 801.248
3 Andhra Pradesh Adilabad 1902.0 0.420 0.000 0.388 6.070 3.331 45.960 233.973 167.971 198.177 26.447 35.083 11.222 Andhra PradeshAdilabad 60.753500 11.642 55.749 661.651
4 Andhra Pradesh Adilabad 1903.0 6.643 1.956 0.173 4.551 33.348 132.078 436.611 334.544 226.037 138.818 14.095 8.823 Andhra PradeshAdilabad 111.473083 17.422 170.150 1150.105
5 Andhra Pradesh Adilabad 1904.0 0.054 0.121 11.446 0.017 16.900 131.048 160.694 81.865 251.577 110.391 0.146 0.130 Andhra PradeshAdilabad 63.699083 0.305 159.411 604.673
6 Andhra Pradesh Adilabad 1905.0 0.589 2.293 8.252 35.020 17.569 79.937 96.331 313.522 361.697 4.950 0.146 0.000 Andhra PradeshAdilabad 76.692167 2.882 140.778 776.646
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
55312 West Bengal North Dinajpur 1998.0 3.676 16.042 15.435 43.497 189.031 101.695 275.914 316.537 262.286 256.652 15.486 0.000 West BengalNorth Dinajpur 124.687583 19.718 349.658 1126.875
55313 West Bengal North Dinajpur 1999.0 7.867 2.932 2.066 3.020 151.115 214.111 380.077 375.183 232.015 85.839 7.591 0.255 West BengalNorth Dinajpur 121.839250 11.054 370.312 1080.705
55314 West Bengal North Dinajpur 2000.0 11.294 10.908 10.686 27.669 110.618 264.845 185.798 297.535 280.883 22.048 0.568 0.423 West BengalNorth Dinajpur 101.939583 22.625 413.818 786.832
55315 West Bengal North Dinajpur 2001.0 1.866 4.048 21.805 36.436 152.242 164.361 311.196 271.373 165.015 124.258 2.798 0.000 West BengalNorth Dinajpur 104.616500 5.914 374.844 874.640
55316 West Bengal North Dinajpur 2002.0 14.939 3.758 12.410 54.591 80.993 189.604 276.109 285.924 215.591 108.733 17.757 0.000 West BengalNorth Dinajpur 105.034083 18.697 337.598 904.114

55284 rows × 20 columns

In [16]:
plt.figure(figsize=(16,6),dpi=80)
plt.xticks(rotation=90)
a = df.groupby('State').mean()
plt.plot(a['Dec-Feb'],label='Dec-Feb')
plt.plot(a['Mar-Jun'],label='Mar-Jun')
plt.plot(a['Jul-Nov'],label='Jul-Nov')
plt.legend(loc='best')
plt.title("Seasonal variation in rainfall for different states")
Out[16]:
Text(0.5, 1.0, 'Seasonal variation in rainfall for different states')
In [17]:
bplot = df[['State', 'Dec-Feb', 'Mar-Jun','Jul-Nov']].groupby(df['State']).sum().plot.bar(stacked=True,figsize=(20,12))
print("Stacked Bar Graph for Rainfall in Different States")
Stacked Bar Graph for Rainfall in Different States
In [18]:
df[['State', 'Dec-Feb', 'Mar-Jun','Jul-Nov']].groupby(df['District']).sum()
Out[18]:
Dec-Feb Mar-Jun Jul-Nov
District
Adilabad 1855.857 19259.712 80664.710
Agra 2206.319 7091.313 67288.460
Ahmadabad 263.360 9924.743 51085.914
Ahmadnagar 803.282 20929.584 64834.537
Aizwal 4907.794 116523.837 156655.649
... ... ... ...
Wokha 6033.575 95579.918 114503.666
Yamunanagar 5505.292 11308.169 54242.271
Yanam 4348.600 22870.200 93319.200
Yavatmal 2321.037 18040.780 78974.286
Zunheboto 5539.647 85844.541 116013.998

540 rows × 3 columns

In [19]:
import seaborn as sns

fig = plt.figure(figsize=(300, 20))
plt.xticks(rotation='vertical')
sns.boxplot(x='District', y='mean_rainfall', data=df)
plt.title("Mean rainfall for each State")

fig.update_layout(xaxis=dict(rangeslider=dict(visible=True),type="linear"))
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-19-32ac5e3504cf> in <module>
      6 plt.title("Mean rainfall for each State")
      7 
----> 8 fig.update_layout(xaxis=dict(rangeslider=dict(visible=True),type="linear"))

AttributeError: 'Figure' object has no attribute 'update_layout'
In [20]:
import plotly.express as px
fig = px.line_polar(df, r="mean_rainfall",theta="State",
                    color='Year' ,line_close=True,
                    color_discrete_sequence=px.colors.sequential.Plasma_r,
                    template="plotly_dark")
fig.show()
In [21]:
fig = px.scatter_polar(df, r="mean_rainfall", theta="State",
                       color="mean_rainfall", symbol="Year", size="mean_rainfall",
                       color_discrete_sequence=px.colors.sequential.Plasma_r)

fig.show()
In [22]:
px.scatter(df, x="Year", 
           y="mean_rainfall", animation_frame="Year", animation_group="State",
           size="mean_rainfall", color="State", hover_name="State", 
           title='Mean Rainfall of each State from years 1900 to 2002',
           log_x=True, size_max=50, range_x=[1899,2004],range_y=[0,300])
In [23]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

districts = df['District'].unique()

np.random.seed(100)
mycolors = np.random.choice(list(mpl.colors.XKCD_COLORS.keys()), len(districts), replace=False)

plt.figure(figsize=(16,12), dpi= 80)
for i, y in enumerate(districts):
    if i > 0:
        plt.plot('Year', 'mean_rainfall', data=df.loc[df.District==y, :][['Year','mean_rainfall']], color=mycolors[i], label=y)
        plt.text(df.loc[df.District==y, 'Year'][-1:].values[0]+3, df.loc[df.District==y, 'mean_rainfall'][-1:].values[0], y, fontsize=12, color=mycolors[i])

plt.gca().set(xlim=(1900, 2003), ylim=(0,1000), ylabel='$Mean Rainfall$', xlabel='$Year$')
plt.yticks(fontsize=12, alpha=.7)
plt.title("Time Series of Rainfall Data from 1900 to 2002 for all cities")
plt.show()
In [24]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

districts = df['District'].unique()

np.random.seed(2)
mycolors = np.random.choice(list(mpl.colors.XKCD_COLORS.keys()), len(districts), replace=False)

plt.figure(figsize=(16,12), dpi= 80)
for i, y in enumerate(districts[:5]):
    plt.plot('Year', 'mean_rainfall', data=df.loc[df.District==y, :][['Year','mean_rainfall']], color=mycolors[i], label=y)
    plt.text(df.loc[df.District==y, 'Year'][-1:].values[0]+3, df.loc[df.District==y, 'mean_rainfall'][-1:].values[0], y, fontsize=12, color=mycolors[i])

        
plt.gca().set(xlim=(1900, 2003), ylim=(0,150), ylabel='$Mean Rainfall$', xlabel='$Year$')
plt.yticks(fontsize=12, alpha=.7)
plt.title("Time Series of Rainfall Data from 1900 to 2002 for 10 cities")
plt.show()
In [ ]: